In [181]:
import numpy as np
from math import pi 
import scipy
from scipy import signal
from matplotlib import pyplot as plt
from scipy.io.wavfile import write

Question 1

In [182]:
f1 = 900 #formant frequency
b1 = 200 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples

Computing the filter parameters

We have the following equations :

$r_i = e^{-B_i \pi T} $

$\theta_i = 2\pi F_i T$

In [183]:
r = np.exp(-b1*pi*T) #calculating the coefficient 'r' from formula
r
Out[183]:
0.9614911598014075
In [184]:
theta = 2*pi*f1*T #calculating the angle of the poles from formula
theta
Out[184]:
0.3534291735288517

Pole zero plot of H(z)

The poles of H(z) are at $(r_i, \theta_i)$ and $(r_i, -\theta_i)$

In [185]:
plt.rcParams['figure.dpi'] = 600
plt.figure(figsize=(12, 12))
plt.plot(r*np.cos(theta), r*np.sin(theta), 'bx') #x axis and y axis coordinates of the poles
plt.plot(r*np.cos(-theta), r*np.sin(-theta), 'bx')#x axis and y axis coordinates of the poles
angle = np.linspace( 0 , 2 * np.pi , 150 ) 

radius = 1

x = radius * np.cos( angle ) 
y = radius * np.sin( angle ) 
plt.plot(x, y, 'r', alpha = 0.2)

plt.axvline(0, c = 'r', ls='--', alpha=0.2)
plt.axhline(0, c = 'r', ls='--', alpha=0.2)
plt.plot(0, 0, 'ro')
plt.legend(["The poles of H(z)"])
Out[185]:
<matplotlib.legend.Legend at 0x209b5d0db70>

H(z)

We know that :

$H(z) = \frac{1}{1 -2rcos(\theta)z^{-1}+ r^2z^{-2}}$

In [186]:
def getFilterDenCoef(F1, B1, Fs):
    #gives the coeffiecients of the denominator of the transfer function i.e
    # denominator is 1 -2*r*np.cos(theta)(z^-1) + r*r(z^-2) so returns the array [1, -2*r*np.cos(theta), r*r]
    r, theta = np.exp(-b1*pi*T), 2*pi*f1*T
    return np.array([1, -2*r*np.cos(theta), r*r])
In [187]:
num = np.array([1])
den = getFilterDenCoef(f1, b1, fs)

Displaying H(z)

In [188]:
s = "{a0} + {a1} z^-1 + {a2} z^-2".format(a0 = den[0], a1 = den[1], a2 = den[2])
print("H(z) = \n")
print(" "*(len(s)//2) + "1\n" + "-"*len(s)+"\n"+s)
H(z) = 

                            1
--------------------------------------------------------
1.0 + -1.8041253513834825 z^-1 + 0.9244652503762558 z^-2
In [189]:
w, h = scipy.signal.freqz(b=num, a=den, fs = fs)#returns the value of the z transform(h) and the corresponding frequency (w)

Magnitude (dB vs Frequency)

From formula

In [190]:
plt.figure(figsize=(15, 8))
plt.plot(w, 20*np.log(np.sqrt(h*np.conjugate(h)))) #magnitude is sqrt(hxh*)
plt.xlabel("Frequency")
plt.ylabel("dB(Magnitude)")
plt.title("Magnitude plot of H(z) dB vs Frequency")
plt.grid(True)
C:\Users\HP\AppData\Roaming\Python\Python310\site-packages\matplotlib\cbook\__init__.py:1298: ComplexWarning: Casting complex values to real discards the imaginary part
  return np.asarray(x, float)

Using the scipy functions for magnitude response

In [191]:
w, mag, phase = scipy.signal.dbode((num, den, 1))#returns the frequency and the corresponding magnitude and phase
In [192]:
plt.figure()
plt.plot(w*fs/(2*pi), mag)    # Bode magnitude plot
plt.xlabel("Frequency")
plt.ylabel("Amplitude in dB")
plt.title("Magnitude response of the system")
plt.figure()
plt.plot(w*fs/(2*pi), phase)  # Bode phase plot
plt.xlabel("Frequency")
plt.ylabel("Phase(Degrees)")
plt.title("Phase response of the system")
plt.show()

Impulse response

Since $H(z) = \frac{1}{1 -2rcos(\theta)z^{-1}+ r^2z^{-2}}$

=> $\frac{Y(z)}{X(z)} = \frac{1}{1 -2rcos(\theta)z^{-1}+ r^2z^{-2}}$

=> $Y(z)(1 -2rcos(\theta)z^{-1}+ r^2z^{-2}) = X(z)$

=> $y[n] -2rcos(\theta)y[n-1] + r^2y[n-2] = x[n]$

Note : We always assume that the system output is casual i.e. y[n] = 0 for n<0

For impulse response we need to excite the above with the signal $\delta[n]$

In [193]:
def computeImpRes(ini, samples, r, theta):
    #computing the impulse response from the difference equations
    y = np.zeros((samples, 1))
    y[0], y[1], y[2] = ini #intiliazes the  first three samples of the output according to ini(passed by user)
    for i in range(3, samples):
        y[i] = 2*r*np.cos(theta)*y[i-1] - r*r*y[i-2] #difference equation generated from H(z)
    return y
In [194]:
#initial conditions come from assuming y is casual => y[-n] = 0 for n<0
#y[0] = 1 (since input is impulse), y[1] = 2*r*np.cos(theta)*y[0], y[2] = 2*r*np.cos(theta)*y[1] - r*r*y[0]
yImp = computeImpRes([1, 2*r*np.cos(theta), 4*r*r*np.cos(theta)*np.cos(theta) - r*r], 200, r, theta)
In [195]:
plt.figure(figsize=(15, 12))
t = [i/fs for i in range(len(yImp))] #getting the time samples from the samples i.e mutlipying by  1/fs (sampling time)
plt.plot(t, yImp, '.')
plt.axhline(0, c = 'r', alpha=0.3)
plt.xlabel("Time(seconds)")
plt.ylabel("Amplitude of signal")
plt.title("Impulse response in time domain")
Out[195]:
Text(0.5, 1.0, 'Impulse response in time domain')

Question 2

Here Part(a), (b) and (c) are the same while only the formants change. Hence, I have only explained Part(a).

Like before,

$H(z) = \frac{1}{1 -2rcos(\theta)z^{-1}+ r^2z^{-2}}$

=> $\frac{Y(z)}{X(z)} = \frac{1}{1 -2rcos(\theta)z^{-1}+ r^2z^{-2}}$

=> $Y(z)(1 -2rcos(\theta)z^{-1}+ r^2z^{-2}) = X(z)$

=> $y[n] -2rcos(\theta)y[n-1] + r^2y[n-2] = x[n]$

Note : We always assume that the system output is casual i.e. y[n] = 0 for n<0

But here, x[n] is the triangular input. I first approximated the peaks of the triangular signal by int(fs/f0) since the signal repeats every 1/f0 seconds. But each sample is 1/fs long => 1/fs * P = 1/f0 where P is the integer number of samples elapsed before the end of 1/f0 seconds. Thus, every P samples the traingular peaks occur. Then, we go width lenght above and below each of such peaks and populate them with appropriate values to get the triangular impulse

In [196]:
f1 = 900 #formant frequency
b1 = 200 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
In [197]:
r = np.exp(-b1*pi*T)
theta = 2*pi*f1*T #computing the pole parameters from formulae
In [198]:
F0 = 160
dur = 0.02 #setting duration of the system
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0 ##approximating location of the peaks of the triangular impulse train
samp = len(timeSpace) #number of samples generated since fs is fixed
width = 2#no of samples on either side of the peak for the triangular input
In [199]:
def computeResponse(x, samples, r, theta):
    #computing the impulse response from the difference equations given x(input)
    assert x.shape[0]==samples
    y = np.zeros((samples, 1))
    y[0] = x[0] #assumed casual
    y[1] = x[1] + 2*r*np.cos(theta)*y[0]
    y[2] = x[2] + 2*r*np.cos(theta)*y[1] - r*r*y[0]
    for i in range(3, samples):
        y[i] = x[i] + 2*r*np.cos(theta)*y[i-1] - r*r*y[i-2]
    return y
In [200]:
def triagInput(P, samples, width = 4, start = 0):
    #start = {0, 1} whether to start at 0 or the next non-zero sample
    #see the explaination of how i generated this function above!
    x = np.zeros((samples, 1))
    peaks= []
    for i in range(start, samples+start):
        if i%P==0:#peaks at the integer multiples of int(fs/f0)
            x[i] = 1 #populating the peaks
            peaks.append(i)
    for j in peaks:
        for k in range(width):
            if j-k>=0:
                x[j-k] = 1 - (k/width)#populate for values less than the peak
            if j+k<samples:
                x[j+k] = 1 - (k/width)#populate for values above than the peak
    return x
In [201]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, computeResponse(triagInput(P, samp, width = width), samp, r, theta))
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[201]:
Text(0.5, 1.0, 'System output of source-filter')
In [202]:
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.plot(timeSpace, computeResponse(triagInput(P, samp, width = 4), samp, r, theta))
plt.axvline(1/F0, linestyle = '--', alpha = 0.3, linewidth = 1)
plt.legend(["Triangular impulse train", "System output of source-filter"],fontsize=6)
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Output of source-filter system")
Out[202]:
Text(0.5, 1.0, 'Output of source-filter system')
In [203]:
F0 = 160
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y = computeResponse(triagInput(P, samp, width = 4), samp, r, theta)#computing the output
scaled = np.int16(y/np.max(np.abs(y)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn2.wav', fs, scaled)

Comments on Qn2 :

Please listen to qn2.wav file for the audio

  1. Waveforms : We see F0 manifest as the pitch of the sound, that is, the entire 1/F0 = 0.00625 seconds duration window repeats F0 times in a second. (We have a triangular pulse now so components of the next pulse manifest in the previous hence we see that the output signal rises above a littl ebefore this!) F1 is the frequency of the oscillations in this window. We see that is is decaying and this decay is due to our impulse repsonse.

  2. We hear a constant low pitch 'a' sound

Question 3

We follow the same steps as Qn2(a) but vary f1, b1 or f0!

Part 1

In [204]:
f1 = 300 #formant frequency
b1 = 100 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
In [205]:
F0 = 120
dur = 0.02
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
In [206]:
r = np.exp(-b1*pi*T)
theta = 2*pi*f1*T
In [207]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, computeResponse(triagInput(P, samp, width = width), samp, r, theta))
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[207]:
Text(0.5, 1.0, 'System output of source-filter')
In [208]:
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.plot(timeSpace, computeResponse(triagInput(P, samp, width = 4), samp, r, theta))
plt.axvline(1/F0, linestyle = '--', alpha = 0.3, linewidth = 1)
plt.legend(["Triangular impulse train", "System output of source-filter"],fontsize=6)
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Output of source-filter system")
Out[208]:
Text(0.5, 1.0, 'Output of source-filter system')
In [209]:
F0 = 120
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y = computeResponse(triagInput(P, samp, width = 4), samp, r, theta)#computing the output
scaled = np.int16(y/np.max(np.abs(y)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn3a.wav', fs, scaled)

Part 2

In [210]:
f1 = 1100 #formant frequency
b1 = 200 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
In [211]:
F0 = 120
dur = 0.02
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
In [212]:
r = np.exp(-b1*pi*T)
theta = 2*pi*f1*T
In [213]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, computeResponse(triagInput(P, samp, width = width), samp, r, theta))
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[213]:
Text(0.5, 1.0, 'System output of source-filter')
In [214]:
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.plot(timeSpace, computeResponse(triagInput(P, samp, width = 4), samp, r, theta))
plt.legend(["Triangular impulse train", "System output of source-filter"],fontsize=6)
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Output of source-filter system")
Out[214]:
Text(0.5, 1.0, 'Output of source-filter system')
In [215]:
F0 = 120
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y = computeResponse(triagInput(P, samp, width = 4), samp, r, theta)#computing the output
scaled = np.int16(y/np.max(np.abs(y)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn3b.wav', fs, scaled)

Part 3

In [216]:
f1 = 300 #formant frequency
b1 = 100 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
In [217]:
F0 = 180
dur = 0.02
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
In [218]:
r = np.exp(-b1*pi*T)
theta = 2*pi*f1*T
In [219]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, computeResponse(triagInput(P, samp, width = width), samp, r, theta))
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[219]:
Text(0.5, 1.0, 'System output of source-filter')
In [220]:
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.plot(timeSpace, computeResponse(triagInput(P, samp, width = 4), samp, r, theta))
plt.legend(["Triangular impulse train", "System output of source-filter"],fontsize=6)
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Output of source-filter system")
Out[220]:
Text(0.5, 1.0, 'Output of source-filter system')
In [221]:
F0 = 180
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y = computeResponse(triagInput(P, samp, width = 4), samp, r, theta)#computing the output
scaled = np.int16(y/np.max(np.abs(y)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn3c.wav', fs, scaled)

Comments on Qn3

Please listen to qn3a.wav for (a) part, qn3b.wav file for the (b) part and qn3c.wav file for the (c) part

Comparison of the sounds

  1. (a) and (c) have the same formant frequency but different F0 => (c) appears to sound the same as (a) but (c) has a higher pitch than (a)
  2. (b) has the same F0 as (a) but different formant frequency implying that the sound heard is different as is the case but the pitch of the voice is th same.

Comparison of the waveforms:

  1. (a) and (c) have the same F1 and thus the oscillations in every (1/F0) window are the same. However F0 for (c) is higher, implying that these windows appear every 1/180 seconds compared to every 1/120 seconds for (a). Thus, in the case of (a) the signal attenuates more since the duration of 1 period is more as compared to (c) where the period is smaller and so less attenuation occurs.
  2. In the case of (b), it has the same F0 as (a) thus implying that each 1/F0 period occurs at the same time for both i.e pitch is the same but the number of oscillations per period in the case of (b) is much higher than that in (a) since F1 for (b) is larger than (a). Thus, although F0 is the same (b) appears to have more attenuation simply becuase the peaks appear more often due to the larger F1 but (a) and (b) both have the same extent of attenuation.

Question 4

Again $H_1(z) = \frac{1}{1 -2r_1cos(\theta_1)z^{-1}+ r_1^2z^{-2}}$

=> $\frac{Y(z)}{X(z)} = \frac{1}{1 -2r_1cos(\theta)z^{-1}+ r_1^2z^{-2}}$

=> $Y(z)(1 -2r_1cos(\theta_1)z^{-1}+ r_1^2z^{-2}) = X(z)$

=> $y[n] -2r_1cos(\theta_1)y[n-1] + r_1^2y[n-2] = x[n]$

Note : We always assume that the system output is casual i.e. y[n] = 0 for n<0

Thus, we can generate y[n]. Now this acts as a input to the next system which is of the same form as $H_1(z)$ i.e. $H_2(z) = \frac{1}{1 -2r_2cos(\theta_2)z^{-1}+ r_2^2z^{-2}}$

Thus, we can keep on doing this to cascade more and more systems

/a/ @ 120Hz

In [222]:
f1 = 730 #formant frequency
f2 = 1090
f3 = 2440
b1 = 100 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
In [223]:
F0 = 120
dur = 0.01
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
In [224]:
r1 = np.exp(-b1*pi*T)
theta1 = 2*pi*f1*T
r2 = np.exp(-b1*pi*T)
theta2 = 2*pi*f2*T
r3 = np.exp(-b1*pi*T)
theta3 = 2*pi*f3*T #computing the paramters for each formant since we are going to use cascade
In [225]:
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)#cascade the systems together
#yI is input to the next cascaded system 
y2 = computeResponse(y1, samp, r2, theta2)
#y2 is input to the next cascaded system 
y3 = computeResponse(y1, samp, r3, theta3)
In [226]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, y3)
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[226]:
Text(0.5, 1.0, 'System output of source-filter')
In [227]:
F0 = 120
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)#computing the output
scaled = np.int16(y3/np.max(np.abs(y3)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn4_a_120Hz.wav', fs, scaled)

/i/ @ 120Hz

In [228]:
f1 = 270 #formant frequency
f2 = 2290
f3 = 3010
b1 = 100 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
In [229]:
F0 = 120
dur = 0.01
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
In [230]:
r1 = np.exp(-b1*pi*T)
theta1 = 2*pi*f1*T
r2 = np.exp(-b1*pi*T)
theta2 = 2*pi*f2*T
r3 = np.exp(-b1*pi*T)
theta3 = 2*pi*f3*T
In [231]:
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)
In [232]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, y3)
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[232]:
Text(0.5, 1.0, 'System output of source-filter')
In [233]:
F0 = 120
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)#computing the output
scaled = np.int16(y3/np.max(np.abs(y3)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn4_i_120Hz.wav', fs, scaled)

/u/ @ 120Hz

In [234]:
f1 = 300 #formant frequency
f2 = 870
f3 = 2240
b1 = 100 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
In [235]:
F0 = 120
dur = 0.01
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
In [236]:
r1 = np.exp(-b1*pi*T)
theta1 = 2*pi*f1*T
r2 = np.exp(-b1*pi*T)
theta2 = 2*pi*f2*T
r3 = np.exp(-b1*pi*T)
theta3 = 2*pi*f3*T
In [237]:
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)
In [238]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, y3)
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[238]:
Text(0.5, 1.0, 'System output of source-filter')
In [239]:
F0 = 120
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)#computing the output
scaled = np.int16(y3/np.max(np.abs(y3)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn4_u_120Hz.wav', fs, scaled)

Now, we go for the high pitch tones

/a/ @ 220Hz

In [240]:
f1 = 730 #formant frequency
f2 = 1090
f3 = 2440
b1 = 100 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
In [241]:
F0 = 220
dur = 0.01
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
In [242]:
r1 = np.exp(-b1*pi*T)
theta1 = 2*pi*f1*T
r2 = np.exp(-b1*pi*T)
theta2 = 2*pi*f2*T
r3 = np.exp(-b1*pi*T)
theta3 = 2*pi*f3*T
In [243]:
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)
In [244]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, y3)
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[244]:
Text(0.5, 1.0, 'System output of source-filter')
In [245]:
F0 = 220
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)#computing the output
scaled = np.int16(y3/np.max(np.abs(y3)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn4_a_220Hz.wav', fs, scaled)

/i/ @ 220Hz

In [246]:
f1 = 270 #formant frequency
f2 = 2290
f3 = 3010
b1 = 100 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
In [247]:
F0 = 220
dur = 0.02
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
In [248]:
r1 = np.exp(-b1*pi*T)
theta1 = 2*pi*f1*T
r2 = np.exp(-b1*pi*T)
theta2 = 2*pi*f2*T
r3 = np.exp(-b1*pi*T)
theta3 = 2*pi*f3*T
In [249]:
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)
In [250]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, y3)
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[250]:
Text(0.5, 1.0, 'System output of source-filter')
In [251]:
F0 = 220
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)#computing the output
scaled = np.int16(y3/np.max(np.abs(y3)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn4_i_220Hz.wav', fs, scaled)

/u/ @ 220Hz

In [252]:
f1 = 300 #formant frequency
f2 = 870
f3 = 2240
b1 = 100 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
In [253]:
F0 = 220
dur = 0.01
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
In [254]:
r1 = np.exp(-b1*pi*T)
theta1 = 2*pi*f1*T
r2 = np.exp(-b1*pi*T)
theta2 = 2*pi*f2*T
r3 = np.exp(-b1*pi*T)
theta3 = 2*pi*f3*T
In [255]:
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)
In [256]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, triagInput(P, samp, width = width))
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, y3)
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[256]:
Text(0.5, 1.0, 'System output of source-filter')
In [257]:
F0 = 220
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)#computing the output
scaled = np.int16(y3/np.max(np.abs(y3)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn4_u_220Hz.wav', fs, scaled)

Please listen to the following files for the sounds

/a/ @120Hz - qn4_a_120Hz.wav

/i/ @120Hz - qn4_i_120Hz.wav

/u/ @120Hz - qn4_u_120Hz.wav

/a/ @220Hz - qn4_a_220Hz.wav

/i/ @220Hz - qn4_i_220Hz.wav

/u/ @220Hz - qn4_u_220Hz.wav

Comments :

  1. Since F1, F2, F3 remain the same for each vowel, we see that the oscillations in a given 1/F0 period don't change for a given vowel.
  2. As F0 increases, we see that the pitch increases of the sound. Further, each period now repeats faster resulting in that higher pitch sound.
  3. The sounds are hard to distinguish as 'a', 'i' and 'u' but after careful listening for some time one can make them out.(Particularly if we listen to them in one continuous hearing we can then distinguish them)
  4. The sound very robotic and monotonous.

Optional part

Glotal pulse shaping

Glottal pulse shaping can be acheived by using the filter of form:

$H(z) = \frac{1}{(1 - az^{-1})^2} $

=> $\frac{Y(z)}{X(z)} = \frac{1}{(1 - az^{-1})^2} $

=> $Y(z)(1 - 2az^{-1} + a^2z^{-2}) = X(z)$

=> $y[n] - 2ay[n-1] + a^2y[n-2] = x[n]$

In [258]:
def glottalPulse(input, a):
    #we use the above difference equation to model glottal pulse shaping
    #we prefer poles very close to the unit circle!
    y = np.zeros_like(input)
    y[0] = input[0]
    y[1] = input[1] + 2*a*y[0]
    y[2] = input[2] + 2*a*y[1] - (a*a*y[0])
    for i in range(3, len(input)):
        y[i] = input[i] + 2*a*y[i-1] - (a*a*y[i-2])
    return y

We do the above for sound 'a' @120Hz

In [259]:
f1 = 730 #formant frequency
f2 = 1090
f3 = 2440
b1 = 100 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
In [260]:
F0 = 120
dur = 0.02
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
In [261]:
r1 = np.exp(-b1*pi*T)
theta1 = 2*pi*f1*T
r2 = np.exp(-b1*pi*T)
theta2 = 2*pi*f2*T
r3 = np.exp(-b1*pi*T)
theta3 = 2*pi*f3*T
In [262]:
yI = glottalPulse(triagInput(P, samp, width = width), 0.9)
y1 = computeResponse(yI, samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)
In [263]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, yI)
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train after being shaped by the glottas")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, y3)
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[263]:
Text(0.5, 1.0, 'System output of source-filter')
In [264]:
F0 = 120
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)#computing the output
scaled = np.int16(y3/np.max(np.abs(y3)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn4_a_120Hz_with_glottal.wav', fs, scaled)

Please listen to qn4_a_120Hz_with_glottal.wav

Now with lip radiation added

Glottal pulse shaping can be cascade with the lip radiation model , can be acheived by using the filter of form:

$H(z) = \frac{1-bz^{-1}}{(1 - az^{-1})^2} $

=> $\frac{Y(z)}{X(z)} = \frac{1-bz^{-1}}{(1 - az^{-1})^2} $

=> $Y(z)(1 - 2az^{-1} + a^2z^{-2}) = X(z)1-bz^{-1}$

=> $y[n] - 2ay[n-1] + a^2y[n-2] = x[n] - bx[n-1]$

In [265]:
def GlottalLip(input, a,b ):
    #we use the above difference equation to find out the system output
    y = np.zeros_like(input)
    y[0] = input[0]
    y[1] = input[1] - b*input[0] + 2*a*y[0]
    y[2] = input[2] - b*input[1]+ 2*a*y[1] - (a*a*y[0])
    for i in range(3, len(input)):
        y[i] = input[i] - b*input[i-1] + 2*a*y[i-1] - (a*a*y[i-2])
    return y
In [266]:
f1 = 730 #formant frequency
f2 = 1090
f3 = 2440
b1 = 100 #bandwith
fs = 16000 #sampling frequency
T = 1.0/fs #samples
F0 = 120
dur = 0.02
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
r1 = np.exp(-b1*pi*T)
theta1 = 2*pi*f1*T
r2 = np.exp(-b1*pi*T)
theta2 = 2*pi*f2*T
r3 = np.exp(-b1*pi*T)
theta3 = 2*pi*f3*T
In [267]:
yI = GlottalLip(triagInput(P, samp, width = width), 0.9, 0.8)
y1 = computeResponse(yI, samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)
In [268]:
plt.subplot(1, 2, 1)
plt.plot(timeSpace, yI)
plt.xlabel("Time in seconds")
plt.ylabel("Amplitude")
plt.title("Triangular impulse train after glottal-lip filter")
plt.subplot(1, 2, 2)
plt.plot(timeSpace, y3)
plt.xlabel("Time in seconds")
plt.title("System output of source-filter")
Out[268]:
Text(0.5, 1.0, 'System output of source-filter')
In [269]:
F0 = 120
dur = 0.5
timeSpace = np.linspace(0, dur, int(dur*fs))
P = fs//F0
samp = len(timeSpace)
width = 2#no of samples on either side of the peak for the triangular input
y1 = computeResponse(triagInput(P, samp, width = width), samp, r1, theta1)
y2 = computeResponse(y1, samp, r2, theta2)
y3 = computeResponse(y1, samp, r3, theta3)#computing the output
scaled = np.int16(y3/np.max(np.abs(y3)) * 32767)#scaling the output to 16bit integers for writing to wav file
write('qn4_a_120Hz_with_glottallip.wav', fs, scaled)

Please listen to qn4_a_120Hz_with_glottallip.wav

Adding jitter to the sound

In [270]:
freqDelta = np.random.rand(samp)